# encoding:utf-8
import requests
from bs4 import BeautifulSoup
import re
from openpyxl import Workbook
import time
import random

def page():
    url = 'https://cq.esf.fang.com/fapai/'
    # 创建一个新的工作簿
    wb = Workbook()
    # 选择默认的工作表
    ws = wb.active

    ws.append(['房屋地址', '图片链接', '保证金', '起拍价', '加价幅度', '单价', '折价率', '面积', '拍卖方式', '延时周期',
               '优先购买权人'])

    for i in range(1,54):
        new_url = f'{url}i3{i}'
        print(new_url)
        start(wb,ws,new_url)
        delay = random.uniform(1, 5)
        print(f"即将延迟 {delay} 秒...")
        time.sleep(delay)

#@title 爬取数据入口
#@author 罗伟
#@date 2025-03-13 15:44
def start(wb,ws,url):
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        # 假设房源信息在class为'house-item'的div标签内
        list_container = soup.find('div',class_='shop_list')
        house_items = list_container.find_all('dt')
        # print(house_items)


        for item in house_items:
            # 提取房源详情，如标题、价格等，根据实际网页结构调整
            a = item.find('a')
            href = a.attrs['href']
            img = item.find('img',class_='loadimg').attrs['data-src']
            url = f'https://cq.esf.fang.com{href}'
            print(url)
            print(img)
            result = read_page(img,url)
            # 保存工作簿
            ws.append(result)
            wb.save("/Users/luowei/Downloads/output.xlsx")
            delay = random.uniform(0.5, 3)
            print(f"即将延迟 {delay} 秒...")
            time.sleep(delay)
            print("延迟结束")
    else:
        print(f'请求失败，状态码: {response.status_code}')


#@title 读取页面
#@author 罗伟
#@date 2025-03-13 16:29
def read_page(img,url):
    my_list = []
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        #名称
        tit_fpf = soup.find('div',class_='tit_fpf')
        print(tit_fpf)
        title = tit_fpf.find('h1').text
        title = remove_house_if_endswith(title)

        my_list.append(title)
        my_list.append(img)
        #拍卖信息
        base_info_fpf = soup.find('div',class_='base_info_fpf')
        lis = base_info_fpf.find_all('li')
        for li in lis:
            name = li.find('span').text.strip()
            text = li.find('p').text.strip()
            print(f'{name}----{text}')
            my_list.append(text)

    else:
        print(f'请求失败，状态码: {response.status_code}')
    return my_list

def remove_house_if_endswith(text):
    if text.endswith("房屋"):
        return text[:-2]
    return text

if __name__ == '__main__':
    print('开始爬取......')
    page()
    # retuls = read_page('','https://cq.esf.fang.com/fapai/out_10755548.html')
    # print(retuls)
    print('结束爬取......')